from pydantic import BaseModel
from unstructured.partition.html import partition_html
import pandas as pd

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

from llama_index.readers.file import FlatReader
from pathlib import Path

reader = FlatReader()
docs_2021 = reader.load_data(Path("tesla_2021_10k.htm"))
#docs_2020 = reader.load_data(Path("tesla_2020_10k.htm"))

from llama_index.core.node_parser import UnstructuredElementNodeParser
from llama_index.core.schema import IndexNode

node_parser = UnstructuredElementNodeParser()

import os
import pickle

raw_nodes_2021 = node_parser.get_nodes_from_documents(docs_2021)
print(raw_nodes_2021)

from llama_index.core.node_parser import UnstructuredElementNodeParser

node_parser = UnstructuredElementNodeParser()

base_nodes_2021, node_mappings_2021 = node_parser.get_base_nodes_and_mappings(
    raw_nodes_2021
)
nodes=[b for b in base_nodes_2021 if isinstance(b, IndexNode)]

print( len(nodes))

example_index_node = nodes[
    20
]


print("OKKKKKKKKKKKKKKKKK")

# Index Node
print(
    f"\n--------\n{example_index_node.get_content(metadata_mode='all')}\n--------\n"
)
# Index Node ID
print(f"\n--------\nIndex ID: {example_index_node.index_id}\n--------\n")
# Referenceed Table
print(
    f"\n--------\n{node_mappings_2021[example_index_node.index_id].get_content()}\n--------\n"
)

from llama_index.core.retrievers import RecursiveRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex(base_nodes_2021)
vector_retriever = vector_index.as_retriever(similarity_top_k=1)
vector_query_engine = vector_index.as_query_engine(similarity_top_k=1)